###TRACE end-of-day data
rm(list=ls()) 
print(Sys.time())
# load libraries
library(stringr)
library(data.table)
library(lubridate)
library(tictoc)
library(dplyr)
library(dtplyr)
library(zoo)
library(openxlsx)
library(taRifx)
library(haven)
library(sandwich)
library(lmtest)
#library(RPostgres)

options(max.print=999999)



# Sets the current path
current_path <- "XXX"
setwd(current_path)


#setwd("~/Corporate_Bond_TRACE/data/TRACE_standard")

####################
TRACE_BTDS<-fread("TRACE_BTDS.csv")
BTDS_144A<-fread("BTDS_144A.csv")
AD<-fread("AD.csv")
bond_master<-fread("bond_master.csv")

TradeData<-rbind(TRACE_BTDS,
                 BTDS_144A,
                 AD)
TradeData[,`:=`(Status="")]
TradeData[trc_st=="M",`:=`(Status="Trade")]
TradeData[trc_st=="N",`:=`(Status="Cancel")]
TradeData[trc_st=="O",`:=`(Status="Correction")]
print(table(TradeData$Status))

###check duplicates
TradeData[,`:=`(checkduplicate=.N),
          by=c(colnames(TradeData))]
table(TradeData$checkduplicate)
TradeData[,`:=`(checkduplicate=NULL)]

######################################################
###[1] Filter the data
TradeData_Cancel<-TradeData[Status=="Cancel"]
TradeData_Correction<-TradeData[Status=="Correction"]
TradeData_Cancel[,`:=`(msg_seq_nb=NULL)]
TradeData_Correction[,`:=`(msg_seq_nb=NULL)]
setnames(TradeData_Cancel,c("orig_msg_seq_nb"),c("msg_seq_nb"))
setnames(TradeData_Correction,c("orig_msg_seq_nb"),c("msg_seq_nb"))
TradeData_Trade<-TradeData[Status=="Trade"]
TradeData_Trade[,`:=`(orig_msg_seq_nb=NULL)]

nrow(TradeData_Trade)
nrow(TradeData_Cancel)
TradeData_Trade<-
  TradeData_Trade[!TradeData_Cancel, on=c("msg_seq_nb","trd_exctn_dt","cusip_id")]
nrow(TradeData_Trade)

nrow(TradeData_Correction)

# A<-inner_join(TradeData_Trade,
#               TradeData_Correction,
#               by=c("msg_seq_nb","trd_exctn_dt","cusip_id"))
# 
# ###We need to add back correction messages, since there are lower than 50% correction message with same 
# ###rptd_pr or quantity with original message.
# A<-as.data.table(A)
# 
# nrow(A[rptd_pr.x==rptd_pr.y&ascii_rptd_vol_tx.x==ascii_rptd_vol_tx.y])/nrow(A)

TradeData_Trade<-
  TradeData_Trade[!TradeData_Correction, on=c("msg_seq_nb","trd_exctn_dt","cusip_id")]
nrow(TradeData_Trade)

TradeData_filter<-rbind(TradeData_Trade,
                        TradeData_Correction)
TradeData_filter[,`:=`(msg_seq_nb=NULL)]

setnames(TradeData_filter,
         c("trans_dt","ascii_rptd_vol_tx","frmt_cd"),
         c("dissemination_date","Quantity(ParVal)","actural_ind"))

TradeData_filter[,`:=`(`Quantity(ParVal)_num`=as.numeric(`Quantity(ParVal)`))]
TradeData_filter[,`:=`(`dum_Quantity(ParVal)_5MM`=0,
                                `dum_Quantity(ParVal)_1MM`=0)]
TradeData_filter[`Quantity(ParVal)`=="5MM+",
                          `:=`(`dum_Quantity(ParVal)_5MM`=1)]
TradeData_filter[`Quantity(ParVal)`=="1MM+",
                          `:=`(`dum_Quantity(ParVal)_1MM`=1)]

print(nrow(TradeData_filter[is.na(`Quantity(ParVal)_num`)]))
print(nrow(TradeData_filter[`dum_Quantity(ParVal)_5MM`==1|`dum_Quantity(ParVal)_1MM`==1]))                                

TradeData_filter[,datetime_exctn := 
                   paste0(as.character(trd_exctn_dt),"T",str_pad(trd_exctn_tm,width=8,side="left",pad="0"))]

TradeData_filter[,`:=`(trd_exctn_dt_s=as.Date(paste0(substr(as.character(trd_exctn_dt),1,4),"-",
                                              substr(as.character(trd_exctn_dt),5,6),"-",
                                              substr(as.character(trd_exctn_dt),7,8))),
                trade_type="")]

print(table(TradeData_filter$rptg_party_type))
print(table(TradeData_filter$contra_party_type))

###There are only S trades for DD trades.
print(table(as.matrix(TradeData_filter[((rptg_party_type=="D")|(rptg_party_type=="T"))&
                                                  ((contra_party_type=="D")|(contra_party_type=="T")),
                                                c("side"),
                                                with=FALSE])))

###"A" is treated as customer "C"; "T" is treated as FINRA member.
print(table(as.matrix(TradeData_filter[((rptg_party_type=="D")|(rptg_party_type=="T"))&
                                                  (contra_party_type=="A"),
                                                c("side"),
                                                with=FALSE])))
###"T" means ATS trades
print(table(as.matrix(TradeData_filter[(rptg_party_type=="T")|
                                                  (contra_party_type=="T"),
                                                c("ATS_indicator"),
                                                with=FALSE])))

###Why are there still ATS==Y? because some member firms report on behalf of ATS or ATS is reporting on behalf of others?
print(table(as.matrix(TradeData_filter[(rptg_party_type!="T")&
                                                  (contra_party_type!="T"),
                                                c("ATS_indicator"),
                                                with=FALSE])))

TradeData_filter[(contra_party_type=="D"|contra_party_type=="T")&
                            (side=="S"),`:=`(trade_type="D2D")]
TradeData_filter[(contra_party_type=="C"|contra_party_type=="A")&
                            (side=="S"),`:=`(trade_type="D2C")]
TradeData_filter[(contra_party_type=="C"|contra_party_type=="A")&
                            (side=="B"),`:=`(trade_type="C2D")]

print(table(TradeData_filter$trade_type))
# ###no party type trades.
# print(table(TradeData_filter[trade_type=="",
#                                       c("Contra Party Type","Reporting Party Type","side"),
#                                       with=FALSE]))

###for trade_type=="", we can use Remuneration and side to decide whether they are DC or DD.
###Remuneration="" only for DD trades and also DD trades cannot be B.
###For Remuneration!="", then can be DC/DA trades.
# TradeData_filter[(trade_type=="")&
#                             (side=="S")&
#                             (Remuneration==""),
#                           `:=`(rptg_party_type="D",
#                                contra_party_type="D",
#                                trade_type="D2D")]
# TradeData_filter[(trade_type=="")&
#                             (Remuneration!="")&
#                             (side=="S"),
#                           `:=`(rptg_party_type="D",
#                                contra_party_type="DC(A)",
#                                trade_type="D2C")]
# TradeData_filter[(trade_type=="")&
#                             (Remuneration!="")&
#                             (side=="B"),
#                           `:=`(rptg_party_type="D",
#                                contra_party_type="DC(A)",
#                                trade_type="C2D")]
# print(table(TradeData_filter[,
#                                       c("trade_type"),
#                                       with=FALSE]))

print(summary(TradeData_filter$rptd_pr))
print(summary(as.matrix(TradeData_filter[!is.na(`Quantity(ParVal)_num`),
                                                  c("Quantity(ParVal)_num"),
                                                  with=FALSE])))

message(paste0("number of obs in combined trade data is ",nrow(TradeData_filter)))
message(" ")
message(paste0("number of bonds is ",
               nrow(unique(as.matrix(TradeData_filter[,c("cusip_id"),with=FALSE])))))
message(" ")

###############################################
###[2] Merge with bond_master fundamentals
setnames(bond_master,c("bsym_id","sub_prdct_type"),
         c("bsym","sub_prd_type"))
merge_index<-c("cusip_id","bond_sym_id","company_symbol","bsym","sub_prd_type")
bond_master[,`:=`(count_seq_cusip=sequence(.N)),
            by=c(merge_index)]
summary(bond_master$count_seq_cusip)
bond_master[,`:=`(num_grade=0)]
bond_master[grade=="I",`:=`(num_grade=1)]
bond_master[,`:=`(mean_num_grade=mean(num_grade,na.rm = TRUE),
                  mean_cpn_rt=mean(cpn_rt,na.rm = TRUE)),
            by=c(merge_index)]
summary(bond_master$mean_num_grade)

bond_master<-bond_master[count_seq_cusip==1]
bond_master[,`:=`(mean_grade="")]
bond_master[mean_num_grade>=0.66,`:=`(mean_grade="I")]
bond_master[mean_num_grade<0.66,`:=`(mean_grade="H")]

table(bond_master$mean_grade)

nrow(TradeData_filter)
TradeData_filter<-left_join(TradeData_filter,
                 bond_master,
                 by=c(merge_index))

TradeData_filter<-as.data.table(TradeData_filter)

TradeData_filter[,`:=`(num_grade=NULL)]
nrow(TradeData_filter)

TradeData_filter[,`:=`(mtrty_dt_s=paste0(substr(as.character(mtrty_dt),1,4),"-",
                                substr(as.character(mtrty_dt),5,6),"-",
                                substr(as.character(mtrty_dt),7,8)))]

###############################################
###[3] Merge with FISD_fundamentals by maturity dates
FISD_fundamentals<-fread("Updated2020_FISD_Issue.csv")

setnames(FISD_fundamentals,tolower(colnames(FISD_fundamentals)))
setnames(FISD_fundamentals,
         c("complete_cusip"),
         c("cusip_id"))

###FISD unique by cusip_id and maturity or simply cusip_id
FISD_fundamentals[,`:=`(count_seq=sequence(.N)),
                  by=c("cusip_id","maturity")]
table(FISD_fundamentals$count_seq)

FISD_fundamentals[,`:=`(mtrty_dt_s=as.Date(paste0(substr(as.character(maturity),1,4),"-",
                                                  substr(as.character(maturity),5,6),"-",
                                                  substr(as.character(maturity),7,8))))]

setcolorder(FISD_fundamentals,c("cusip_id","amount_outstanding","offering_amt","offering_date","maturity","mtrty_dt_s"))

FISD_fundamentals_variablelist<-
  c("cusip_id","amount_outstanding","offering_amt","offering_date","maturity","mtrty_dt_s",
    "issue_id","issuer_id","prospectus_issuer_name","issuer_cusip","issue_cusip","issue_name",
    "canadian", "oid", "foreign_currency", "slob",
    "convertible","yankee","rule_144a","treasury_spread","treasury_maturity","denomination","isin","perpetual","sec_cusip",
    "industry_group","industry_code","sic_code","naics_code")
FISD_fundamentals<-
  FISD_fundamentals[,c(FISD_fundamentals_variablelist),with=FALSE]

Trial<-
  left_join(TradeData_filter,
            FISD_fundamentals,
            by=c("cusip_id"))
Trial<-as.data.table(Trial)
setcolorder(Trial,
            c("mtrty_dt_s.x","mtrty_dt_s.y","offering_date","maturity","amount_outstanding","offering_amt"))

###temporarily save.
fwrite(Trial,
       "Trial.csv")

Trial<-fread("Trial.csv")

summary(as.matrix(Trial[,c("amount_outstanding","offering_amt",
                                                       "offering_date",
                                                       "maturity"),
                                                     with=FALSE]))
###amountoutstanding equals amountoutstanding_thousands
summary(Trial$amount_outstanding)
summary(Trial$offering_amt)
summary(Trial$offering_date)
summary(Trial$maturity)


TradeData_filter_masterfundamental_FISDfundamental<-as.data.table(Trial)

TradeData_filter_masterfundamental_FISDfundamental[,
                                                   `:=`(amountoutstanding_thousands=amount_outstanding)]

summary(TradeData_filter_masterfundamental_FISDfundamental$amountoutstanding_thousands)

TradeData_filter_masterfundamental_FISDfundamental[,`:=`(offering_date_s_FISD=as.character(offering_date))]

TradeData_filter_masterfundamental_FISDfundamental[,
            `:=`(offering_dt_new=(as.Date(as.character(offering_date), "%Y%m%d")))]

nrow(TradeData_filter_masterfundamental_FISDfundamental[is.na(offering_dt_new)])

###time_to_maturity and time_to_offering
TradeData_filter_masterfundamental_FISDfundamental[,
                 time_to_offering:=as.Date(trd_exctn_dt_s)-as.Date(offering_dt_new)]
TradeData_filter_masterfundamental_FISDfundamental[,
                 time_to_maturity:=as.integer(as.Date(mtrty_dt_s.x)-as.Date(trd_exctn_dt_s))]

summary(TradeData_filter_masterfundamental_FISDfundamental$time_to_offering)
summary(TradeData_filter_masterfundamental_FISDfundamental$time_to_maturity)

setcolorder(TradeData_filter_masterfundamental_FISDfundamental,
            c("time_to_maturity","time_to_offering"))

fwrite(TradeData_filter_masterfundamental_FISDfundamental,
       "TradeData_filter_masterfundamental_FISDfundamental.csv")

###############################################
TradeData_filter_masterfundamental_FISDfundamental<-
  fread("TradeData_filter_masterfundamental_FISDfundamental.csv")

###[4] Merge with FISD rating. 
rm(DT_bondratings)
FISD_identifier<-c("issue_id","issuer_id","issuer_cusip",
                   "issue_cusip","cusip_id","prospectus_issuer_name",
                   "issue_name","mtrty_dt","offering_dt")
DT_bondratings<-fread("Updated2020_FISD_rating_data.csv")

setnames(DT_bondratings,tolower(colnames(DT_bondratings)))
setnames(DT_bondratings,
         c("complete_cusip"),
         c("cusip_id"))
DT_bondratings[,`:=`(mtrty_dt=as.character(maturity),
                     offering_dt=as.character(offering_date),
                     rating_dt=as.character(rating_date))]

#drop duplicates by "rating_type", "rating_date" and FISD_identifier
DT_bondratings[,`:=`(count=.N,count_seq=sequence(.N)),
               by=c("rating_type","rating_date",FISD_identifier)]
table(DT_bondratings$count_seq)
DT_bondratings<-DT_bondratings[!count_seq>1]

###run codes to assign values to different ratings
source("assign_val_rating.R")

print(table(DT_bondratings$mean_rating))
print(table(DT_bondratings$max_investment_grade))

DT_bondratings<-DT_bondratings[,c(FISD_identifier,"mean_rating","mean_rating_code","max_investment_grade",
                                  "rating_dt"),
                               with=FALSE]

DT_bondratings[,`:=`(mtrty_dt_new=paste0(substr(mtrty_dt,1,4),"-",substr(mtrty_dt,5,6),"-",
                                         substr(mtrty_dt,7,8)))]
DT_bondratings[,`:=`(rating_dt_new=paste0(substr(rating_dt,1,4),"-",substr(rating_dt,5,6),"-",
                                          substr(rating_dt,7,8)))]
DT_bondratings[,`:=`(mtrty_dt=NULL)]
DT_bondratings[,`:=`(rating_dt=NULL)]
setnames(DT_bondratings,c("mtrty_dt_new"),c("mtrty_dt"))
setnames(DT_bondratings,c("rating_dt_new"),c("rating_dt"))

DT_bondratings_identifier<-as.data.table(unique(as.matrix(DT_bondratings[,c("cusip_id","mtrty_dt"),
                                                                         with=FALSE])))

setnames(DT_bondratings,
         c("rating_dt"),c("DATE"))
###make sure cusip_id+mtrty_dt+DATE (rating_dt) uniquely identfies credit rating in DT_bondratings
print(nrow(DT_bondratings))
DT_bondratings<-unique(DT_bondratings)
print(nrow(DT_bondratings))

DT_bondratings[,`:=`(count_seq=sequence(.N)),
               by=c("cusip_id","mtrty_dt","DATE")]
print(table(DT_bondratings$count_seq))

nrow(unique(as.matrix(DT_bondratings[,c("cusip_id","mtrty_dt","DATE"),
                                     with=FALSE])))

TradeData_filter_masterfundamental_FISDfundamental[,`:=`(DATE=as.character(trd_exctn_dt_s))]
nrow(TradeData_filter_masterfundamental_FISDfundamental[mtrty_dt_s.x!=mtrty_dt_s.y])
nrow(TradeData_filter_masterfundamental_FISDfundamental[is.na(mtrty_dt_s.x)|mtrty_dt_s.x==""|mtrty_dt_s.x=="NA-NA-NA"])
nrow(TradeData_filter_masterfundamental_FISDfundamental[is.na(mtrty_dt_s.y)|mtrty_dt_s.y==""|mtrty_dt_s.y=="NA-NA-NA"])

setnames(DT_bondratings,c("mtrty_dt"),
         c("mtrty_dt_s.x"))

TradeData_filter_masterfundamental_FISDfundamental[,
  `:=`(mtrty_dt_s.x=as.character(mtrty_dt_s.x))
]
DT_bondratings[,
  `:=`(mtrty_dt_s.x=as.character(mtrty_dt_s.x))
  ]

TradeData_filter_masterfundamental_FISDfundamentalrating<-
  full_join(TradeData_filter_masterfundamental_FISDfundamental,
            DT_bondratings,
            by=c("cusip_id","mtrty_dt_s.x","DATE"))

TradeData_filter_masterfundamental_FISDfundamentalrating<-
  as.data.table(TradeData_filter_masterfundamental_FISDfundamentalrating)

TradeData_filter_masterfundamental_FISDfundamentalrating<-
  TradeData_filter_masterfundamental_FISDfundamentalrating[order(cusip_id,mtrty_dt_s.x,DATE)]

nrow(TradeData_filter_masterfundamental_FISDfundamentalrating[is.na(mean_rating_code)])

class(TradeData_filter_masterfundamental_FISDfundamentalrating$mean_rating_code)
nrow(TradeData_filter_masterfundamental_FISDfundamentalrating[is.nan(mean_rating_code)])
TradeData_filter_masterfundamental_FISDfundamentalrating[is.nan(mean_rating_code),`:=`(mean_rating_code=NA)]

TradeData_filter_masterfundamental_FISDfundamentalrating[,
                                                         `:=`(mean_rating_code=na.locf(mean_rating_code,na.rm = FALSE)),by=c("cusip_id","mtrty_dt_s.x")]

nrow(TradeData_filter_masterfundamental_FISDfundamentalrating[is.na(mean_rating_code)])

TradeData_filter_masterfundamental_FISDfundamentalrating[,
                                                 mean_rating_code:=na.locf(mean_rating_code,na.rm = FALSE,fromLast = TRUE),
                                                 by=c("cusip_id","mtrty_dt_s.x")]
nrow(TradeData_filter_masterfundamental_FISDfundamentalrating[is.na(mean_rating_code)])

table(TradeData_filter_masterfundamental_FISDfundamentalrating$mean_rating)
summary(TradeData_filter_masterfundamental_FISDfundamentalrating$mean_rating_code)

TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>5.5&mean_rating_code<=6.5,mean_rating:="A"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>4.5&mean_rating_code<=5.5,mean_rating:="A+"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>6.5&mean_rating_code<=7.5,mean_rating:="A-"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>2.5&mean_rating_code<=3.5,mean_rating:="AA"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>1.5&mean_rating_code<=2.5,mean_rating:="AA+"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>3.5&mean_rating_code<=4.5,mean_rating:="AA-"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code<=1.5,mean_rating:="AAA"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>14.5&mean_rating_code<=15.5,mean_rating:="B"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>13.5&mean_rating_code<=14.5,mean_rating:="B+"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>15.5&mean_rating_code<=16.5,mean_rating:="B-"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>11.5&mean_rating_code<=12.5,mean_rating:="BB"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>10.5&mean_rating_code<=11.5,mean_rating:="BB+"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>12.5&mean_rating_code<=13.5,mean_rating:="BB-"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>8.5&mean_rating_code<=9.5,mean_rating:="BBB"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>7.5&mean_rating_code<=8.5,mean_rating:="BBB+"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>9.5&mean_rating_code<=10.5,mean_rating:="BBB-"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>20.5&mean_rating_code<=21.5,mean_rating:="C"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>19.5&mean_rating_code<=20.5,mean_rating:="CC"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>17.5&mean_rating_code<=18.5,mean_rating:="CCC"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>16.5&mean_rating_code<=17.5,mean_rating:="CCC+"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>18.5&mean_rating_code<=19.5,mean_rating:="CCC-"]
TradeData_filter_masterfundamental_FISDfundamentalrating[mean_rating_code>24.5&mean_rating_code<=25.5,mean_rating:="D"]
TradeData_filter_masterfundamental_FISDfundamentalrating[is.na(mean_rating_code),mean_rating:="NR"]

table(TradeData_filter_masterfundamental_FISDfundamentalrating$mean_rating)

print(nrow(TradeData_filter_masterfundamental_FISDfundamentalrating))
#bonds not exist in FINRA trade data have "Execution Date" missing
TradeData_filter_masterfundamental_FISDfundamentalrating<-
  na.omit(TradeData_filter_masterfundamental_FISDfundamentalrating,cols="trd_exctn_dt")
print(nrow(TradeData_filter_masterfundamental_FISDfundamentalrating))
print(nrow(TradeData_filter_masterfundamental_FISDfundamental))

print(table(TradeData_filter_masterfundamental_FISDfundamentalrating$mean_rating))
summary(TradeData_filter_masterfundamental_FISDfundamentalrating$mean_rating_code)
print(message(paste0("proportion of transactions of non-rated bonds is ",
                     nrow(TradeData_filter_masterfundamental_FISDfundamentalrating[is.na(mean_rating_code)])/nrow(TradeData_filter_masterfundamental_FISDfundamentalrating))))
###the reason why issuer_id, issuer_cusip, etc are NA in merged data is: we use cusip_id+mtrty_dt+DATE to emerge. 

TradeData_filter_masterfundamental_FISDfundamentalrating[,`:=`(issue_id.y=NULL,issuer_id.y=NULL,issuer_cusip.y=NULL,
                                                       issue_cusip.y=NULL,prospectus_issuer_name.y=NULL,
                                                       issue_name.y=NULL,offering_dt.y=NULL,max_investment_grade=NULL,
                                                       count_seq=NULL)]

TradeData_filter_masterfundamental_FISDfundamentalrating[,`:=`(new_bond_grade="")]
TradeData_filter_masterfundamental_FISDfundamentalrating[(mean_rating %in% c("A","A-","A+","AA","AA-",
                                                                     "AA+","AAA",
                                                                     "BBB+","BBB","BBB-")),
                                                 `:=`(new_bond_grade="I")]

TradeData_filter_masterfundamental_FISDfundamentalrating[(mean_rating %in% c("B","B-","B+","BB","BB-","BB+",
                                                                             "C","CC","CCC","CCC-","CCC+","D")),
                                                 `:=`(new_bond_grade="H")]

TradeData_filter_masterfundamental_FISDfundamentalrating[new_bond_grade=="",
                                                 `:=`(new_bond_grade="NR")]

table(TradeData_filter_masterfundamental_FISDfundamentalrating$new_bond_grade)
nrow(TradeData_filter_masterfundamental_FISDfundamentalrating[new_bond_grade!="NR"&mean_grade!=new_bond_grade])
table(TradeData_filter_masterfundamental_FISDfundamentalrating[new_bond_grade=="NR",c("mean_grade"),with=FALSE])
TradeData_filter_masterfundamental_FISDfundamentalrating[,`:=`(new_bond_grade2=new_bond_grade)]
TradeData_filter_masterfundamental_FISDfundamentalrating[new_bond_grade2=="NR",`:=`(new_bond_grade2=mean_grade)]
table(TradeData_filter_masterfundamental_FISDfundamentalrating$new_bond_grade2)
table(TradeData_filter_masterfundamental_FISDfundamentalrating$mean_grade)

fwrite(TradeData_filter_masterfundamental_FISDfundamentalrating,
       "TradeData_filter_masterfundamental_FISDfundamentalrating.csv")

###############################################
TradeData_filter_masterfundamental_FISDfundamentalrating_mostrecentFISDrating<-
  fread("TradeData_filter_masterfundamental_FISDfundamentalrating.csv")

###[5] Exclude unusural bonds
FISD_fundamentals<-fread("Updated2020_FISD_Issue.csv")

#setwd("~/Corporate_Bond_TRACE/data/TRACE_standard")
setnames(FISD_fundamentals,tolower(colnames(FISD_fundamentals)))
setnames(FISD_fundamentals,
         c("complete_cusip"),
         c("cusip_id"))

###FISD unique by cusip_id and maturity or simply cusip_id
FISD_fundamentals[,`:=`(count_seq=sequence(.N)),
                  by=c("cusip_id","maturity")]
table(FISD_fundamentals$count_seq)

FISD_fundamentals[,`:=`(mtrty_dt_s=as.Date(paste0(substr(as.character(maturity),1,4),"-",
                                                  substr(as.character(maturity),5,6),"-",
                                                  substr(as.character(maturity),7,8))))]

setcolorder(FISD_fundamentals,c("cusip_id","amount_outstanding","offering_amt","offering_date","maturity","mtrty_dt_s"))

FISD_fundamentals_variablelist<-
  c("cusip_id", "bond_type",
    "putable", "private_placement","unit_deal", "exchangeable","redeemable","fungible",
    "preferred_security",
    "coupon_type")

FISD_fundamentals<-
  FISD_fundamentals[,c(FISD_fundamentals_variablelist),with=FALSE]

Trial<-
  left_join(TradeData_filter_masterfundamental_FISDfundamentalrating_mostrecentFISDrating,
            FISD_fundamentals,
            by=c("cusip_id"))

table(Trial$putable)
table(Trial$exchangeable)
table(Trial$convertible)
Trial<-as.data.table(Trial)
nrow(Trial[bond_type %in%
             c("ABS","C10Y","C1Y","C2Y","C30Y","C3M","C5Y","C6M",
               "CCUR","CTBD","CTBL","EBON","EMTN","FGOV","FGS",
               "MBS","MUNI","O10Y","O13W","O26W","O2Y","O30Y",
               "O3Y","O4W","O52W","O5Y","O7Y","PS","PSTK","TPCS",
               "TXMU","USBD","USBL","USNT","USSI","USSP","USTC")])
table(Trial$rule_144a)
table(Trial$ind_144a)
table(Trial$private_placement)
table(Trial$preferred_security)
table(Trial$coupon_type)


nrow(Trial[putable=="Y"|
             exchangeable=="Y"|
             convertible=="Y"|rule_144a=="Y"|preferred_security=="Y"|coupon_type=="V"|
             (bond_type %in%
                c("ABS","C10Y","C1Y","C2Y","C30Y","C3M","C5Y","C6M",
                  "CTBD","CTBL","EBON","EMTN","FGOV","FGS",
                  "MBS","MUNI","O10Y","O13W","O26W","O2Y","O30Y",
                  "O3Y","O4W","O52W","O5Y","O7Y","PS","PSTK","TPCS",
                  "TXMU","USBD","USBL","USNT","USSI","USSP","USTC"))])/nrow(Trial)

print(nrow(Trial))
Trial<-
  Trial[(!(putable=="Y"))|(is.na(putable))|(putable=="")]
print(nrow(Trial))
Trial<-
  Trial[(!(exchangeable=="Y"))|(is.na(exchangeable))|(exchangeable=="")]
print(nrow(Trial))
Trial<-
  Trial[(!(rule_144a=="Y"))|(is.na(rule_144a))|(rule_144a=="")]
print(nrow(Trial))
Trial<-
  Trial[(!(preferred_security=="Y"))|(is.na(preferred_security))|(preferred_security=="")]
print(nrow(Trial))
Trial<-
  Trial[(!(coupon_type=="V"))|(is.na(coupon_type))|(coupon_type=="")]
print(nrow(Trial))
Trial<-
  Trial[(!(bond_type %in%
             c("ABS","C10Y","C1Y","C2Y","C30Y","C3M","C5Y","C6M",
               "CTBD","CTBL","EBON","EMTN","FGOV","FGS",
               "MBS","MUNI","O10Y","O13W","O26W","O2Y","O30Y",
               "O3Y","O4W","O52W","O5Y","O7Y","PS","PSTK","TPCS",
               "TXMU","USBD","USBL","USNT","USSI","USSP","USTC")))|(is.na(bond_type))|(bond_type=="")]
print(nrow(Trial))



fwrite(Trial,
       "Data_FilterMerge_standard_TRACE.csv")






